---
title: "Enrolment and Baseline Summaries"
description: |
Exploratory summaries of enrolments and baseline data.
author: "James Totterdell"
date: today
---
```{r}
#| label: pkgs
#| code-summary: Load packages
library(tidyverse)
library(patchwork)
library(DT)
library(plotly)
library(knitr)
library(lubridate)
library(kableExtra)
theme_set(theme_classic(base_size = 10, base_family = "Palatino") +
theme(panel.grid = element_blank(),
strip.background = element_blank()))
```
```{r}
#| label: load-data
#| code-summary: Load data
devtools::load_all()
all_data <- read_all_no_daily()
all_daily_data <- read_all_daily()
```
# Enrolment
```{r}
#| label: fig-enrolment
#| code-summary: Study enrolment
#| fig-cap-location: margin
#| fig-cap: |
#| Cumulative enrolments to the platform,
#| vertical lines indicate timing of interim analyses
#| fig-height: 7
md <- get_interim_dates()
id <- get_intervention_dates()
p1 <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0") %>%
dplyr::count(RandDate) %>%
complete(RandDate =
seq.Date(min(RandDate, na.rm = TRUE),
as.Date("2022-04-09"),
by = "1 day"),
fill = list(n = 0)) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Randomisation date` = RandDate) %>%
ggplot(., aes(`Randomisation date`, `Cumulative enrolments`)) +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
labs(x = "", subtitle = "Cumulative enrolments", y = "")
p2 <- id %>%
mutate(
Intervention = fct_inorder(val_labels(Intervention))
) %>%
ggplot(.) +
facet_grid(Domain ~ ., drop = TRUE, scales = "free_y", space = "free_y") +
geom_point(data = . %>% filter(Domain != "Antiviral"),
aes(x = Intervention, y = endate), shape = 4) +
geom_segment(
aes(x = Intervention, xend = Intervention,
y = stdate, yend = endate)) +
geom_hline(data = md,
aes(yintercept = as.numeric(meet_date)),
linetype = 2) +
coord_flip() +
labs(x = "", y = "Calendar date", subtitle = "Available interventions")
p <- p1 / p2
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-overall.pdf"), p, height = 4.5, width = 6)
p
```
```{r}
#| label: fig-enrolment-ovr-ctry
#| code-summary: Study enrolment
#| fig-cap-location: margin
#| fig-cap: |
#| Cumulative enrolments to the platform,
#| vertical lines indicate timing of interim analyses
#| fig-height: 7
p1 <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0") %>%
dplyr::count(Country = PT_CountryName, RandDate) %>%
complete(
Country,
RandDate = seq.Date(min(RandDate, na.rm = T), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
ungroup() %>%
mutate(Country = factor(Country, levels = c("India", "Australia", "Nepal", "New Zealand"))) %>%
rename(`Randomisation date` = RandDate) %>%
ggplot(., aes(`Randomisation date`, `Cumulative enrolments`, fill = Country)) +
# facet_wrap( ~ Country, ncol = 1, scales = "free_y") +
geom_area(position = "stack") +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_fill_viridis_d("", begin = 0.2, end = 0.8, option = "B") +
labs(x = "", subtitle = "Cumulative enrolments", y = "") +
theme(legend.position = "top",
legend.key.size = unit(0.7, "lines"),
legend.text = element_text(size = rel(0.8)))
p3 <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0") %>%
dplyr::count(Country = PT_CountryName, RandDate) %>%
complete(
Country,
RandDate = seq.Date(min(RandDate, na.rm = T), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
ungroup() %>%
mutate(Country = factor(Country, levels = c("India", "Australia", "Nepal", "New Zealand"))) %>%
rename(`Randomisation date` = RandDate) %>%
ggplot(., aes(`Randomisation date`, `Cumulative enrolments`, group = Country)) +
# facet_wrap( ~ Country, ncol = 1, scales = "free_y") +
geom_step(position = "stack") +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_fill_viridis_d("", begin = 0.2, end = 0.8, option = "B") +
labs(x = "", subtitle = "Cumulative enrolments", y = "") +
theme(legend.position = "top",
legend.key.size = unit(0.7, "lines"),
legend.text = element_text(size = rel(0.8)))
p <- p1 / p2
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-overall-country.pdf"), p, height = 5, width = 6)
p
```
```{r}
#| label: fig-enrolment-country
#| code-summary: Study enrolment by country
#| fig-cap-location: margin
#| fig-cap: |
#| Cumulative enrolments to the platform by country,
#| vertical lines indicate timing of interim analyses
#| fig-height: 10
p1 <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0") %>%
dplyr::count(Country = PT_CountryName, RandDate) %>%
complete(
Country,
RandDate = seq.Date(min(RandDate, na.rm = T), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Randomisation date` = RandDate) %>%
ggplot(., aes(`Randomisation date`, `Cumulative enrolments`)) +
facet_wrap( ~ Country, ncol = 1, scales = "free_y") +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
labs(x = "", subtitle = "Cumulative enrolments", y = "")
p <- (p1 / p2) + plot_layout(heights = c(5, 2))
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-country.pdf"), p, height = 6, width = 6)
p
```
```{r}
#| label: enrolment-site
#| code-summary: Study enrolment by site
#| column: screen-inset-shaded
#| layout-nrow: 1
p1 <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0") %>%
dplyr::count(Country = PT_CountryName, Site = PT_LocationName, RandDate) %>%
complete(
nesting(Country, Site),
RandDate = seq.Date(min(RandDate), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country, Site) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Calendar date` = RandDate) %>%
ggplot(., aes(`Calendar date`, `Cumulative enrolments`)) +
facet_wrap( ~ paste(Country, Site, sep = ": "),
ncol = 5, scales = "free_y") +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_y_continuous(breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))))
ggplotly(p1, height = 1000)
```
```{r}
#| label: enrolment-india-sites
p_india <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0", PT_CountryName == "India") %>%
dplyr::count(Country = PT_CountryName, Site = PT_LocationName, RandDate) %>%
complete(
nesting(Country, Site),
RandDate = seq.Date(min(RandDate), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country, Site) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Calendar date` = RandDate) %>%
ggplot(., aes(`Calendar date`, `Cumulative enrolments`)) +
facet_wrap( ~ Site,
ncol = 3, scales = "free_y") +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_y_continuous(breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
scale_x_date(date_breaks = "4 months", date_labels = "%b %Y") +
theme(strip.text = element_text(size = rel(0.7)))
```
```{r}
#| label: save-india-site-enrolment-figure
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-india-sites.pdf"), p_india, height = 5, width = 7)
```
```{r}
#| label: enrolment-australia-sites
p_aus <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0", PT_CountryName == "Australia") %>%
dplyr::count(Country = PT_CountryName, Site = PT_LocationName, RandDate) %>%
complete(
nesting(Country, Site),
RandDate = seq.Date(min(RandDate), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country, Site) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Calendar date` = RandDate) %>%
ggplot(., aes(`Calendar date`, `Cumulative enrolments`)) +
facet_wrap( ~ Site,
ncol = 3, scales = "free_y") +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_y_continuous(breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
scale_x_date(date_breaks = "4 months", date_labels = "%b %Y") +
theme(strip.text = element_text(size = rel(0.7)))
```
```{r}
#| label: save-australia-site-enrolment-figure
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-australia-sites.pdf"), p_aus, height = 5, width = 7)
```
```{r}
#| label: enrolment-nepal-nz-sites
p_npnz <- all_data %>%
filter(ENR_rec == 1, CAssignment != "C0", PT_CountryName %in% c("Nepal", "New Zealand")) %>%
dplyr::count(Country = PT_CountryName, Site = PT_LocationName, RandDate) %>%
complete(
nesting(Country, Site),
RandDate = seq.Date(as.Date("2021-02-08"), as.Date("2022-04-09"), by = "1 day"),
fill = list(n = 0)) %>%
group_by(Country, Site) %>%
mutate(`Cumulative enrolments` = cumsum(n)) %>%
rename(`Calendar date` = RandDate) %>%
ggplot(., aes(`Calendar date`, `Cumulative enrolments`)) +
facet_wrap( ~ paste(Country, Site, sep = ": "),
ncol = 3, scales = "free_y") +
geom_step() +
geom_vline(data = md,
aes(xintercept = as.numeric(meet_date)),
linetype = 2) +
scale_y_continuous(breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
scale_x_date(date_breaks = "4 months", date_labels = "%b %Y") +
theme(strip.text = element_text(size = rel(0.7)))
```
```{r}
#| label: save-nepal-nz-site-enrolment-figure
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "enrolment-nepalnz-sites.pdf"), p_npnz, height = 3, width = 7)
```
## Intervention Assignments
```{r}
#| label: tbl-intervention-assignments-anticoagulation
#| tbl-cap: Counts of intervention assignments to anticoagulation
#| tbl-cap-location: margin
all_data %>%
filter_fas_itt() %>%
dplyr::count(FAS_ITT, ACS_ITT, CAssignment) %>%
kable() %>%
kable_styling("striped", font_size = 14)
```
```{r}
#| label: fig-intervention-assignment-overtime
#| code-summary: Intervention allocations overtime
#| fig-cap: |
#| Allocation to interventions by calendar date.
p1 <- all_data %>%
filter(ENR_rec == 1) %>%
dplyr::count(Intervention = CAssignment, RandDate) %>%
complete(Intervention,
RandDate = seq.Date(min(RandDate, na.rm = T),
as.Date("2022-04-09"),
by = "1 day"),
fill = list(n = 0)) %>%
group_by(Intervention) %>%
mutate(cn = cumsum(n)) %>%
ungroup() %>%
ggplot(., aes(RandDate, cn)) +
geom_step(aes(colour = Intervention)) +
scale_color_viridis_d(option = "D") +
labs(x = "", y = "Cumulative allocation")
p1 / p2
```
### Country
Allocations to interventions by country of enrolment.
```{r}
#| label: tbl-allocation-country
#| code-summary: Allocations by site
#| tbl-cap: |
#| Allocation to anti-coagulation interventions by country
all_data %>%
filter(ENR_rec == 1) %>%
dplyr::count(Country = PT_CountryName, Intervention = factor(CAssignment, labels = intervention_labels2()$CAssignment)) %>%
group_by(Country) %>%
mutate(p = n / sum(n)) %>%
mutate(lab = sprintf("%i (%.2f)", n, p)) %>%
select(-n, -p) %>%
ungroup() %>%
spread(Intervention, lab, fill = "0 (0.00)") %>%
kable(align = "lrrrrr") %>%
kable_styling(bootstrap_options = "striped", font_size = 12)
```
```{r}
#| label: fig-allocation-country
#| code-summary: Allocations by country
#| fig-cap: |
#| Allocation to anti-coagulation interventions by country
#| fig-height: 8
p <- all_data %>%
filter(ENR_rec == 1) %>%
dplyr::count(Country = Country,
Intervention = factor(CAssignment, labels = c("Not randomised", intervention_labels_short()$CAssignment[-1]))) %>%
group_by(Country) %>%
mutate(p = n / sum(n)) %>%
ggplot(., aes(Intervention, n)) +
facet_wrap( ~ Country, scales = "free_y", ncol = 4) +
geom_col() +
scale_y_continuous("Frequency",
breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25))
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "allocations-country.pdf"), p, height = 2.5, width = 7)
p
```
### Site
Allocations to interventions by site of enrolment.
```{r}
#| label: fig-allocation-site
#| code-summary: Allocations by site
#| fig-cap: |
#| Allocation to anti-coagulation interventions by site
#| fig-height: 8
p <- all_data %>%
filter(ENR_rec == 1) %>%
dplyr::count(Country = Country,
Site = Location,
Intervention = factor(CAssignment, labels = c("Not randomised", intervention_labels_short()$CAssignment[-1]))) %>%
group_by(Site = paste(Country, Site, sep = ": ")) %>%
mutate(p = n / sum(n)) %>%
ggplot(., aes(Intervention, n)) +
facet_wrap( ~ Site, scales = "free_y") +
geom_col() +
scale_y_continuous("Frequency",
breaks = function(x)
unique(floor(pretty(seq(0, (max(x) + 1) * 1.1))))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25))
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "allocations-sites.pdf"), p, height = 6, width = 7)
p
```
### Calender Time
```{r}
#| label: fig-allocation-epoch
#| code-summary: Allocations by epoch
#| fig-cap: |
#| Allocation to anti-coagulation interventions by country
#| fig-height: 8
p <- all_data %>%
filter(ENR_rec == 1) %>%
dplyr::count(yr = year(RandDate), mth = month(RandDate),
Intervention = factor(CAssignment, labels = c("Not randomised", intervention_labels_short()$CAssignment[-1]))) %>%
group_by(yr, mth) %>%
mutate(p = n / sum(n)) %>%
ggplot(., aes(mth, p, fill = Intervention)) +
facet_grid( ~ yr, drop = T, scales = "free_x", space = "free") +
geom_col() +
scale_y_continuous("Distribution of\ninterventions") +
scale_fill_viridis_d(option = "B") +
labs(
x = "Calendar date (month of year)") +
scale_x_continuous(breaks = 1:12) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.25))
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "allocations-calendar.pdf"), p, height = 2.5, width = 7)
p
```
# Baseline Factors
The following provides an overview of the baseline covariates collected for individuals.
## Demographics
Baseline demographics are summarised by intervention in the following table.
```{r}
#| label: tbl-demographics-table
#| code-summary: Demographics table
#| tbl-cap: |
#| Baseline demographics for participants randomised into
#| the anticoagulation domain.
sdat <- all_data %>%
filter_acs_itt() %>%
mutate(CAssignment = factor(CAssignment, labels = intervention_labels()$CAssignment[-1]))
generate_baseline_demographics_table(sdat, format = "html")
```
```{r}
#| label: save-baseline-demographics-table
#| code-summary: Save table to outputs
save_tex_table(
generate_baseline_demographics_table(sdat, format = "latex"),
"baseline/demographics"
)
```
## Co-morbidities
Baseline co-morbidities are summarised by anti-coagulation intervention in the following table.
```{r}
#| label: tbl-comorbidities-table
#| code-summary: Co-morbidities table
#| tbl-cap: |
#| Baseline co-morbidities for participants randomised into
#| then anticoagulation domain.
sdat <- all_data %>%
filter_acs_itt() %>%
mutate(CAssignment = factor(CAssignment, labels = intervention_labels()$CAssignment[-1]))
generate_baseline_comorbidities_table(sdat, format = "html")
```
```{r}
#| label: save-baseline-comorbidities-table
#| code-summary: Save table to outputs
save_tex_table(
generate_baseline_comorbidities_table(sdat, format = "latex"),
"baseline/comorbidities"
)
```
## Prognostics
Baseline prognostics are summarised by anti-coagulation intervention in the following table.
```{r}
#| label: tbl-prognostics-table
#| code-summary: Prognostics table
#| tbl-cap: |
#| Baseline prognostic variables for
#| participants randomised into anticoagulation domain.
sdat <- all_data %>%
filter_acs_itt() %>%
mutate(CAssignment = factor(CAssignment, labels = intervention_labels()$CAssignment[-1]))
generate_baseline_prognostics_table(sdat, format = "html")
```
```{r}
#| label: save-baseline-prognostics-table
#| code-summary: Save table to outputs
save_tex_table(
generate_baseline_prognostics_table(sdat, format = "latex"),
"baseline/prognostics"
)
```
## Age
```{r}
#| label: fig-age-hist
#| code-summary: Histogram of age
#| fig-cap: |
#| Distribution of age
p_age <- all_data %>%
filter_acs_itt() %>%
ggplot(., aes(AgeAtEntry)) +
geom_histogram(
breaks = c(18, seq(20, 100, 5)),
colour = "white",
closed="left") +
labs(
x = "Age at randomisation (5-year bins, 30 to 34, 35 to 39, etc.)",
y = "Count") +
scale_x_continuous(breaks = seq(20, 95, 5))
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "age-overall.pdf"), p_age, height = 2, width = 4)
p_age
```
```{r}
#| label: fig-age-by-cassignment
#| code-summary: Histogram of age by anti-coagulation intervention
#| fig-cap: |
#| Distribution of age by anti-coagulation intervention
#| fig-cap-location: bottom
p <- all_data %>%
filter_acs_itt() %>%
ggplot(., aes(AgeAtEntry)) +
facet_wrap( ~ CAssignment, scales = "free_y", ncol = 2) +
geom_histogram(
breaks = c(18, seq(20, 100, 5)),
colour = "white",
closed="left") +
labs(
x = "Age at randomisation (5-year bins, 30 to 34, 35 to 39, etc.)",
y = "Count") +
scale_x_continuous(breaks = seq(20, 95, 5))
p
```
## Weight
```{r}
#| label: fig-weight-hist
#| code-summary: Histogram of weight
#| fig-cap: |
#| Distribution of weight
p <- all_data %>%
filter_acs_itt() %>%
filter(!is.na(BAS_WeightMeasurement)) %>%
# 2 with weight reported as 0, make NA
mutate(BAS_Weight = if_else(BAS_Weight == 0, NA_real_, BAS_Weight)) %>%
ggplot(., aes(BAS_Weight)) +
facet_wrap( ~ BAS_WeightMeasurement, scales = "free_y") +
geom_histogram()
p
```
## Date since first symptoms
```{r}
#| fig-cap: |
#| Days between events for hospitalisation, randomisation,
#| symptom onset, and first positive test.
#| echo: false
tsfs_dat <- all_data %>%
filter_acs_itt() %>%
transmute(
CAssignment,
RandDate,
EL_AdmittedToHospital,
EL_FirstSymptoms,
EL_FirstPositiveTest,
fs_to_hosp = as.numeric(EL_AdmittedToHospital - EL_FirstSymptoms),
hosp_to_rand = as.numeric(RandDate - EL_AdmittedToHospital),
fs_to_rand = as.numeric(RandDate - EL_FirstSymptoms),
pt_to_rand = as.numeric(RandDate - EL_FirstPositiveTest))
p1 <- ggplot(tsfs_dat, aes(fs_to_hosp)) +
geom_bar() +
labs(x = "First symptoms to hospitalisation",
y = "Frequency")
p2 <- ggplot(tsfs_dat, aes(fs_to_rand)) +
geom_bar() +
labs(x = "First symptoms to randomisation", y = "Frequency")
p3 <- ggplot(tsfs_dat, aes(hosp_to_rand)) +
geom_bar() +
labs(x = "Hositalisation to randomisation", y = "Frequency")
p4 <- ggplot(tsfs_dat, aes(pt_to_rand)) +
geom_bar() +
labs(x = "Positive test to randomisation", y = "Frequency")
p <- (p1 | p2) / (p3 | p4)
pth <- file.path("outputs", "figures", "baseline")
ggsave(file.path(pth, "days-between-events-overall.pdf"), p, height = 3, width = 6)
p
```
# Drugs Received During Hospital Stay
```{r}
#| label: tbl-drugs-table
#| code-summary: Drugs during hospital stay table
#| tbl-cap: |
#| Drugs received during hospital stay for participants randomised into
#| the anticoagulation domain.
sdat <- all_data %>%
filter_acs_itt() %>%
mutate(CAssignment = factor(CAssignment, labels = intervention_labels()$CAssignment[-1]))
generate_discharge_drugs_table(sdat, format = "html")
```
```{r}
#| label: save-drugs-table
#| code-summary: Save table to outputs
save_tex_table(
generate_discharge_drugs_table(sdat, format = "latex"),
"baseline/drugs"
)
```